rm(list=ls())
source("Packages.R")
# install.packages("devtools")
# install.packages("mice")
# utils::install.packages("miceadds")
packages <- c("devtools", "dplyr", "fastDummies",
	      "foreign", "ggfortify", "ggplot2", 
	      "gmodels", "ggthemes", "haven", "lazyeval", 
	      "miceadds", "missForest", "mice", 
	      "multiwayvcov", "purrr", "readstata13", 
	      "stargazer", "tidyr", "stringr", 
	      "tibble", "VIM", "readr")

package_load(packages)
#1) Load merged dataset

load("Imputation_data.Rda") # load imputed dataset


# FUNCTIONS: to choose selected variables, remove cases where certain variables not asked (thus no imputation)

reg.fun = function(yvar, xvars, midval, x_factors){
	# load mi1 as single regression dataframe
	df.obj = mice::complete(midval, action = 'long', include = TRUE)
	# choose variables
	xvars = c(".imp", ".id", "village", "feeder", xvars)
	to_sel = c(yvar, xvars)
	# if any variables in these categories (where some observations excluded), flag
	x1 = to_sel[to_sel %in% c("ccratedifficulty", "ccratecost")]
	if(length(x1) == 0){
		# if not, then choose columns
		df.obj.selected.2= df.obj[, names(df.obj) %in% to_sel]
	} else{
		# if so, then choose columns AND filter out observation where not asked
		x1 = paste0(x1, "_not_asked") # create vector of `not asked` variables
		to_sel = c(to_sel, x1) # add them to variable selection
		df.obj.selected.1 = df.obj[, names(df.obj) %in% to_sel] # pull variables
		# create dataframe indicating where at least one 'not_asked" is 1
	
		sum_not_asked = df.obj.selected.1 %>% select(c(x1)) %>% 
			mutate_if(is.factor, funs(as.numeric(as.character(.)))) %>%
			rowSums() %>% as.numeric()
		sum_not_asked = ifelse(sum_not_asked > 0, 1, 0)
		
		# remove observations where at least one 'not_asked" is 1
		df.obj.selected.2 = df.obj.selected.1[sum_not_asked == 0,]
	}
	# ensure that dependent variable is numeric
	df.obj.selected.2[, names(df.obj.selected.2) == yvar] = 
		as.numeric(as.character(df.obj.selected.2[, names(df.obj.selected.2) == yvar]))
	
	if(missing(x_factors) == TRUE){
		change_names = names(df.obj.selected.2)[grepl("(trust)|(perceive)", names(df.obj.selected.2))]
		df.obj.selected.2 = df.obj.selected.2 %>% 
			mutate_at(vars(change_names), funs(. = as.numeric(as.character(.))))
	}

	if(c("rationcard") %in% names(df.obj.selected.2)){
		df.obj.selected.2 = df.obj.selected.2 %>% 
			mutate(rationcard_dum = ifelse(is.na(rationcard), NA, 
						       ifelse(rationcard == 0, 0, 1)))
	}

	mi.return = mice::as.mids(df.obj.selected.2)
	return(mi.return)}


regression = function(yvar, xvars, midval, include_feeders = FALSE){
	datlist = miceadds::mids2datlist(midval)
	df.obj = mice::complete(midval, action = 'long', include = TRUE)
	village.cluster = as.numeric(substring(datlist[[1]]$village, 1, 5))
	f1 = paste0("miceadds::lm.cluster(data = data, formula = ", yvar, " ~ ", paste(xvars, collapse = " + "), " + I(feeder)", ", cluster = village.cluster)")
	formval = paste0("mod = lapply(datlist, FUN = function(data){", f1, "})")
	eval(parse(text = formval))
	beta.out = lapply( mod, FUN = function(rr){coef(rr)})
	var.out = lapply( mod, FUN = function(rr){vcov(rr)})

	# Calculate r_squared
	
	r.sq = lapply(mod, FUN = function(rr){summary(rr$lm_res)$r.squared})
	analyses = as.mira(mod)$analyses
	m = length(analyses)
	adj.r2 = matrix(NA, nrow = m, ncol = 3, dimnames = list(seq_len(m), 
		    c("R^2", "Fisher trans F^2", "se()")))

	r2 = adj.r2

    for (i in seq_len(m)){
	    fit <- analyses[[i]] 
	    adj.r2[i, 1] = sqrt(summary(fit$lm_res)$adj.r.squared) 
	    adj.r2[i, 2] <- 0.5 * log((adj.r2[i, 1] + 1)/(1 - adj.r2[i, 1])) 
	    adj.r2[i, 3] <- 1/(length(summary(fit$lm_res)$residuals) - 3)

	    r2[i, 1] = sqrt(summary(fit$lm_res)$r.squared) 
	    r2[i, 2] <- 0.5 * log((r2[i, 1] + 1)/(1 - r2[i, 1])) 
	    r2[i, 3] <- 1/(length(summary(fit$lm_res)$residuals) - 3)
    }

    fit <- pool.scalar(r2[, 2], r2[, 3])
    fit.adj <- pool.scalar(adj.r2[, 2], adj.r2[, 3])

    qbar.adj <- fit.adj$qbar
    qbar <- fit$qbar

    table.adj <- array(((exp(2 * qbar.adj) - 1)/(1 + exp(2 * qbar.adj)))^2,
        dim = c(1, 4))
    
    table <- array(((exp(2 * qbar) - 1)/(1 + exp(2 * qbar)))^2,
        dim = c(1, 4))
    
    dimnames(table) = list("R^2", c("est", "lo 95", "hi 95", "fmi"))
    dimnames(table.adj) = list("adj R^2", c("est", "lo 95", "hi 95", "fmi"))
    
    table[, 2] <- ((exp(2 * (qbar - 1.96 * sqrt(fit$t))) - 1)/(1 + exp(2 * (qbar - 1.96 * sqrt(fit$t)))))^2
    table.adj[, 3] <- ((exp(2 * (qbar.adj + 1.96 * sqrt(fit.adj$t))) - 1)/
		       (1 + exp(2 * (qbar.adj + 1.96 * sqrt(fit.adj$t)))))^2

    table[, 4] <- fit$f
    table.adj[, 4] <- fit.adj$f 
    
    pooled.out = miceadds::pool_mi(qhat = beta.out, u=var.out) 
    if(include_feeders == FALSE){ 
	    name_vals = rownames(summary(pooled.out))[grepl("I(feeder)", rownames(summary(pooled.out)), fixed = TRUE) == FALSE] 
	    coef_vals = summary(pooled.out)$results[grepl("I(feeder)", rownames(summary(pooled.out)), fixed = TRUE) == FALSE] 
	    se_vals = summary(pooled.out)$se[grepl("I(feeder)", rownames(summary(pooled.out)), fixed = TRUE) == FALSE] 
	    p_vals = as.numeric(summary(pooled.out)$p[grepl("I(feeder)", rownames(summary(pooled.out)), fixed = TRUE) == FALSE]) 
	    out.val = data.frame(Variable = name_vals, 
				 Coefficient = coef_vals, 
				 Standard_Error = se_vals, 
				 pvalue = p_vals)
	   
	    return(list(out.val, table, table.adj)); print(f1)
	}
	else{return(list(pooled.out, table, table.adj)); print(f1)}}


# REGRESSIONS 

# ***With controls
# *Without connection cost


# eststo: reg received i.feeder treatment rationcard econ2 econ3, cluster(village)
yvar = c("applied")
xvars = c("treatment_dum", "rationcard", "econ2", "econ3")
mi.applied = reg.fun(yvar, xvars, midval = mi1,  x_factors = c("rationcard", "econ2", "econ3", "treatment_dum"))
xvars = c("treatment_dum", "rationcard_dum", "econ2", "econ3")
reg.applied = regression(yvar = yvar, xvars = xvars, midval = mi.applied)
yvar = c()
xvars = c()


# eststo: reg perceivedifficulty i.feeder treatment rationcard econ2 econ3, cluster(village)
yvar = c("perceivedifficulty")
xvars = c("treatment_dum", "rationcard", "econ2", "econ3")
mi.pd = reg.fun(yvar, xvars, midval = mi1, 
		x_factors = c("rationcard", "econ2", "econ3", "treatment_dum"))

xvars = c("treatment_dum", "rationcard_dum", "econ2", "econ3")
reg.pd = regression(yvar = yvar, xvars = xvars, midval = mi.pd)
yvar = c()
xvars = c()


# eststo: reg perceivedifficulty i.feeder treatment rationcard econ2 econ3, cluster(village)
yvar = c("received")
xvars = c("treatment_dum", "rationcard", "econ2", "econ3")
mi.rec = reg.fun(yvar, xvars, midval = mi1,  
		 x_factors = c("rationcard", "econ2", "econ3", "treatment_dum"))
xvars = c("treatment_dum", "rationcard_dum", "econ2", "econ3")
reg.rec = regression(yvar = yvar, xvars = xvars, midval = mi.rec)
yvar = c()
xvars = c()

# eststo: reg perceivecost i.feeder treatment rationcard econ2 econ3, cluster(village)
yvar = c("perceivecost")
xvars = c("treatment_dum", "rationcard", "econ2", "econ3")
mi.pc = reg.fun(yvar, xvars, midval = mi1, 
		x_factors = c("rationcard", "econ2", "econ3", "treatment_dum"))
xvars = c("treatment_dum", "rationcard_dum", "econ2", "econ3")
reg.pc = regression(yvar = yvar, xvars = xvars, midval = mi.pc)
yvar = c()
xvars = c()


Combined.A10 = rbind(reg.applied[[1]] %>% mutate(DV = "Applied for Connection"), 
		    reg.pd[[1]] %>% mutate(DV = "Perceived Ease"), 
		    reg.pc[[1]] %>% mutate(DV = "Perceived Affordability"), 
		    reg.rec[[1]] %>% mutate(DV = "Received Connection"))


Table.A10 = Combined.A10 %>% 
	mutate_at(vars(Coefficient, Standard_Error), funs(round(., 4))) %>%
	mutate(pvalue = ifelse(pvalue <= 0.001, "***",
			       ifelse(pvalue <= 0.01, "**", 
				      ifelse(pvalue <= 0.05, "*",
					     ifelse(pvalue <= 0.1, "+", ""))))) %>%
	mutate(Coefficient = paste0(as.character(Coefficient), pvalue)) %>% 
	select(-pvalue) %>% gather("Variable_Value", "Statistic", Coefficient, Standard_Error) %>%
	spread(DV, Statistic) %>% mutate(Variable = factor(Variable, levels = c("treatment_dum1", "rationcard_dum", "econ21", "econ31", "(Intercept)"))) %>% arrange(Variable, Variable_Value) %>%
	mutate(Variable = ifelse(Variable_Value == "Standard_Error", "", as.character(Variable))) %>%
	mutate_at(vars(3:6), funs(ifelse(Variable_Value == "Standard_Error", 
					 paste0("(", as.character(.), ")"),
					 .))) %>% select(-Variable_Value) %>%
	mutate(Variable = ifelse(Variable == "econ21", "Can't Save", 
				 ifelse(Variable == "econ31", "Can Save", 
					ifelse(Variable == "rationcard_dum", "Ration Card", 
					       ifelse(Variable == "treatment_dum1", "Campaign", 
						      ifelse(Variable == "(Intercept)", "Constant", "")))))) %>%
	select(Variable, `Applied for Connection`, `Received Connection`, `Perceived Ease`, `Perceived Affordability`) 


reg.applied[[2]]
reg.applied[[3]]

reg.pd[[2]]
reg.pd[[3]]

reg.pc[[2]]
reg.pc[[3]]

reg.rec[[2]]
reg.rec[[3]]


save(Table.A10, file = "Table.A10.Imputation.Rda")
save(reg.applied, reg.pd, reg.pc, reg.rec, file = "Table.A10.Components.RData")



